/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2005 by Myricom, Inc.  All rights reserved.                 *
 *************************************************************************/

static const char __idstring[] = "@(#)$Id: mx_connect.c,v 1.90 2006/12/14 07:55:23 loic Exp $";

#include "mx_auto_config.h"
#include "myriexpress.h"
#include "mx_extensions.h"
#include "mx__lib_types.h"
#  include "mx__partner.h"
#include "mx__endpoint.h"
#include "mx_connect.h"
#include "mx_byteswap.h"
#include "mx__request.h"
#include "mx__lib.h"
#include "mx__driver_interface.h"
#  include "mx_stbar.h"
#include "mx__requests.h"
#include "mx__error.h"
#include "mx__sleep.h"
#include "mx__shmem.h"
#include "mx__wait_queue.h"

void
mx__handle_connect(struct mx_endpoint *ep, mcp_uevt_connect_t *evt)
{
  struct mx__connect_data *conn = MX__CONNECT_DATA(evt);
  union mx_request *q;
  struct mx__partner * partner;
  uint32_t session_id_n, app_key_n, connect_seqnum;
  uint16_t src_peer_index;
  uint16_t send_seq;

  session_id_n = conn->dest_session_n;
  app_key_n = conn->app_key_n;
  connect_seqnum = conn->connect_seqnum_n;
  src_peer_index = ntohs(evt->src_peer_index);
  send_seq = ntohs(conn->seqnum_start_n);

  if (unlikely(src_peer_index == (uint16_t) MX_UNKNOWN_SRC_PEER_INDEX)) {
    if (mx__opt.verbose >= 1)
      mx_printf("received connect request with unknown source peer index\n");
    return;
  }
  partner = mx__endpoint_lookup_partner(ep, evt->src_endpt,
					src_peer_index);
  mx_fixme_assert(partner);
  q = mx__rl_alloc(ep);
  mx_fixme_assert(q);
  memset(q,0,sizeof(*q));
  if (ntohl(app_key_n) == ep->connect.key) {

    /* before connecting the partner, check whether
     * it was previously connected differently */
    if (partner->connect_session_n != session_id_n
	&& partner->best_session_n != -1
	&& partner->best_session_n != session_id_n) {
      /* this is a reconnect from a new instance of the partner */
      if (mx__opt.verbose) {
	mx_printf("Received connect with new session id from peer ");
	mx__print_partner(partner);
	mx_printf(", need to cleanup pending requests\n");
      }
      /* See the bottom of mx__partner.c for documentation about disconnection and so */
      mx__partner_cleanup(ep, partner, 0);
    }
    if (partner->best_session_n != session_id_n) {
      partner->send_seq = send_seq;
      partner->send_acked = MX__SEQNO(partner->send_seq);
    }
    q->connect.seqnum_start = partner->recv_seq;
    q->connect.status_code_n = MX_STATUS_SUCCESS;
    q->connect.app_key_n = ep->endpoint_sid_n;
    partner->connect_session_n = session_id_n;
    partner->best_session_n = session_id_n;
    partner->connect_recvseq = connect_seqnum;
    partner->recv_acked = partner->recv_seq;
    partner->fully_recv_seq = partner->recv_seq;
  } else {
    q->connect.status_code_n = MX_STATUS_BAD_KEY;
  }
  
  q->connect.connect_seqnum_n = connect_seqnum;
  q->connect.dest_session_n = session_id_n;
  q->connect.peer_endpoint_id = evt->src_endpt;
  q->connect.peer_index_n = evt->src_peer_index;
  q->connect.basic.mcp_handle = -1;
  q->connect.basic.state = MX__REQUEST_STATE_SEND_QUEUED;
  q->connect.basic.type = MX__REQUEST_TYPE_CONNECT_REPLY;
  q->connect.basic.wq = NULL;
  q->connect.basic.partner = partner;

  /* Enqueue in the resend_rreq because we don't want the reply to be
   * stucked by resources starvation of other message types.
   * It is required for parity recovery since the recovering node
   * needs a connect reply yo reset the src_peer_index so that
   * communication may restart. */
  mx__enqueue_request(&ep->resend_reqq, q);

  /* we cannot call mx_luigi, not a problem becasuse
     mx__process_requests is currently done after mx_process_events,
     and even if not somebody will eventually call mx_luigi anyway */
}

void
mx__handle_connect_reply(struct mx_endpoint *ep, mcp_uevt_connect_t *evt)
{
  union mx_request *q;
  struct mx__request_queue_head * elt;
  struct mx__partner *partner;
  int count = 0;
  struct mx__connect_data *conn_data = MX__CONNECT_DATA(evt);
  uint16_t src_peer_index;

  /* theoretically there is a small chance the connect req might be
     under requeuing, but worst case, we might have to resend it 
     another time */
  MX__FOREACH_REQ(q, elt, &ep->resend_list) {
    if (q->basic.type != MX__REQUEST_TYPE_CONNECT)
      continue;
    count +=1;
    if (q->connect.connect_seqnum_n == conn_data->connect_seqnum_n && 
	conn_data->dest_session_n == ep->endpoint_sid_n &&
	evt->src_endpt == q->connect.peer_endpoint_id &&
	evt->src_peer_index == q->connect.peer_index_n) {
      goto found;
    }
  }
  MX__FOREACH_REQ(q, elt, &ep->mcp_connectq) {
    mx_assert (q->basic.type == MX__REQUEST_TYPE_CONNECT);
    count +=1;
    if (q->connect.connect_seqnum_n == conn_data->connect_seqnum_n && 
	conn_data->dest_session_n == ep->endpoint_sid_n &&
	evt->src_endpt == q->connect.peer_endpoint_id &&
	evt->src_peer_index == q->connect.peer_index_n) {
      goto found;
    }
  }
  if (mx__opt.verbose)
    mx_printf("orphan connect reply:%x,queued=%d\n", ntohs(conn_data->connect_seqnum_n),count);
  return;
  
found:

  src_peer_index = ntohs(q->connect.peer_index_n);
  if (unlikely(src_peer_index == (uint16_t) MX_UNKNOWN_SRC_PEER_INDEX)) {
    if (mx__opt.verbose >= 1)
      mx_printf("receive packet with unknown source peer index\n");
    return;
  }
  partner = mx__endpoint_lookup_partner(ep, q->connect.peer_endpoint_id,
					src_peer_index);
  mx_fixme_assert(partner);

  mx_assert(!(q->connect.basic.state & MX__REQUEST_STATE_SEND_QUEUED));
  if (!(q->connect.basic.state & MX__REQUEST_STATE_MCP)) {
    mx__spliceout_request(&ep->resend_list, q);
    mx__connect_complete(ep, q, conn_data->status_code);
  } else {
    q->connect.basic.state |= MX__REQUEST_STATE_REPLIED;
  }

  if (conn_data->status_code == MX_STATUS_SUCCESS &&
      partner->endpoint_sid_n != conn_data->app_key_n) {
    if (conn_data->app_key_n != partner->connect_session_n) {
      mx_assert(conn_data->app_key_n != partner->best_session_n);
      mx__partner_cleanup(ep, partner, 0);
    }
    if (conn_data->app_key_n != partner->best_session_n) {
      partner->send_seq = ntohs(conn_data->seqnum_start_n);
      partner->send_acked = MX__SEQNO(partner->send_seq);
    }
    /* only use the result if this is a real new session */
    partner->endpoint_sid_n = conn_data->app_key_n;
    partner->best_session_n = conn_data->app_key_n;
  }
}

void
mx__init_connect(struct mx_endpoint *ep, uint32_t endpoint_key, mx_param_t *param, uint32_t params_count )
{
  ep->connect.key = endpoint_key;
}

void
mx__end_connect(struct mx_endpoint *ep)
{
}

void
mx__connect_complete(struct mx_endpoint *ep, union mx_request *q, mx_status_code_t status_code)
{
  MX__EP_STATS_INC(ep, completion);
  if (ep->in_progression_thread)
    MX__EP_STATS_INC(ep, overlapped_completion);

  q->basic.partner->app_key_n = q->connect.app_key_n;

  if (!(q->connect.basic.state & MX__REQUEST_STATE_DEAD)) {
    /* wake up the waiter if any */
    q->connect.basic.state |= MX__REQUEST_STATE_COMPLETED;

    if (q->connect.basic.status.code == MX_SUCCESS) {
      /* only set the status if it is not already set to an error */
      q->connect.basic.status.code = mx__error_req(ep, "connect", q, status_code);
    }

    mx__notify_waiter_request_done(ep, q);

    if (q->connect.is_synchronous) {
      /* connect request are in no queue between the reply and the completion */
      ep->connect_count++;
    } else {
      /* add to doneq and wakeup peekers if asynchronous */
      uint64_t match_info = q->basic.status.match_info;
      uint32_t ctxid = CTXID_FROM_MATCHING(ep, match_info);
      mx__notify_peeker_request_done(ep, q, ctxid);
    }
  } else {
    /* mx_forget was called */
    mx_status_code_t status = q->connect.basic.status.code;
    if (status != MX_STATUS_SUCCESS) {
      if (!ep->cancelled) {
	/* only handle errors when the endpoint is totally open */
	mx__error_req(ep, "connect request (already completed)", q, status);
      }
    }
    mx__rl_free(ep, q);
  }
}

static mx_return_t
mx__connect_common(struct mx_endpoint *ep, uint64_t nic_id, uint32_t eid,
		   uint32_t key, union mx_request * q)
{
  struct mx__partner *partner;
  mx_lookup_peer_t lookup;
  mx_return_t rc;

  q->connect.basic.wq = NULL;
  q->connect.basic.type = MX__REQUEST_TYPE_CONNECT;
  q->connect.basic.state = 0;
  q->connect.basic.requeued = 0;
  q->connect.basic.acquired_by_wait_any = 0;

  if (nic_id == ep->myself->nic_id) {
    lookup.index = ntohs(ep->myself->peer_index_n);
  } else {
    lookup.board_number = ep->board_num;
    lookup.nic_id = nic_id;
    rc = mx__nic_id_to_peer_index(ep->handle, &lookup);
    if (rc != MX_SUCCESS) {
      char s[18];
      mx__nic_id_to_str(s, nic_id, 18);
      return mx__error(ep, "mx__connect_common(%s)",
		       MX_NIC_NOT_FOUND, s, eid);
    }
  }
  mx_assert(lookup.index < 0xffff);
  partner = mx__endpoint_lookup_partner(ep, eid, lookup.index);
  mx_fixme_assert(partner);
  mx__partner_to_addr(partner, &q->basic.status.source);

  /* make sure this is initialized for self and shmem so that request dump is correct */
  q->connect.basic.partner = partner;
  q->connect.peer_endpoint_id = partner->eid;
  q->connect.peer_index_n = partner->peer_index_n;
  q->connect.dest_session_n = ep->endpoint_sid_n;
  q->connect.app_key_n = htonl(key);
  q->connect.connect_seqnum_n = ++partner->connect_sendseq;
  q->connect.seqnum_start = partner->recv_seq;

  if (!mx__opt.disable_self && partner == ep->myself) {
    mx__connect_complete (ep, q, key == ep->connect.key ? MX_STATUS_SUCCESS : MX_STATUS_BAD_KEY);
    return MX_SUCCESS;
  }
#if MX_USE_SHMEM
  if (!mx__opt.disable_shmem && lookup.index == ntohs(ep->myself->peer_index_n)) {
    struct mx__shm_peer *peer = ep->shm->peers + partner->eid;
    mx_status_code_t status_code = MX_STATUS_SUCCESS;
    mx__shm_forget_peer(ep, partner->eid);
    peer->snd_shmq = mx__shm_open(ep, partner->eid, 0, 0);
    if (!peer->snd_shmq) {
      status_code = MX_STATUS_ENDPOINT_CLOSED;
    } else if (key != peer->snd_shmq->app_key) {
      status_code = MX_STATUS_BAD_KEY;
    }
    mx__connect_complete (ep, q, status_code);
    return MX_SUCCESS;
  }
#endif

  q->connect.basic.mcp_handle = -1;
  q->connect.basic.state = MX__REQUEST_STATE_SEND_QUEUED;

  /* Enqueue in the resend_rreq for consistency with connect_reply (see above)
   * This way, the connect won't be stucked by resources starvation of other
   * message types. */
  mx__enqueue_request(&ep->resend_reqq, q);

  mx__luigi(ep);

  return MX_SUCCESS;
}

MX_FUNC(mx_return_t)
mx_iconnect(struct mx_endpoint *ep, uint64_t nic_id, uint32_t eid,
	    uint32_t key, uint64_t match_info,
	    void *context, mx_request_t *request)
{
  mx_request_t q;
  mx_return_t ret;

  MX__MUTEX_LOCK(&ep->lock);
  q = mx__rl_alloc(ep);
  if (!q) {
    ret = mx__error(ep, "mx_iconnect", MX_NO_RESOURCES);
    goto out;
  }
  q->connect.is_synchronous = 0;
  q->connect.basic.status.context = context;
  q->connect.basic.status.match_info = match_info;
  ret = mx__connect_common(ep, nic_id, eid, key, q);
  if (ret != MX_SUCCESS) {
    mx__rl_free(ep, q);
    goto out;
  }

 out:
  MX__MUTEX_UNLOCK(&ep->lock);
  *request = q;
  return ret;
}

MX_FUNC(mx_return_t)
mx_connect(struct mx_endpoint *ep, uint64_t nic_id, uint32_t eid, 
	   uint32_t key, uint32_t timeout, mx_endpoint_addr_t *addrp)
{
  mx_request_t q;
  int code;
  mx_return_t ret;

  MX__MUTEX_LOCK(&ep->lock);
  q = mx__rl_alloc(ep);
  if (!q) {
    ret = mx__error(ep, "mx_connect", MX_NO_RESOURCES);
    goto out;
  }
  q->connect.is_synchronous = 1;

  ret = mx__connect_common(ep, nic_id, eid, key, q);
  if (ret != MX_SUCCESS) {
    mx__rl_free(ep, q);
    goto out;
  }

  if (!(q->connect.basic.state & MX__REQUEST_STATE_COMPLETED)) {
    struct  mx__wait_queue wq;
    q->connect.basic.wq = &wq;
    mx__sleep_on_wait_queue(ep, q->connect.basic.wq, timeout, &ep->wait_waiters, 
			    &ep->wait_queue_head);
    q->connect.basic.wq = NULL;
  }

  /* either the sleep timeout expired or the request is COMPLETED */

  while (q->connect.basic.state & (MX__REQUEST_STATE_MCP | MX__REQUEST_STATE_SEND_QUEUED)) {
    /* arrggh: the wait timeout expired before the mcp finished sending.
     * busy loop for a while since it won't be long. */
    mx__luigi(ep);
  }

  if (!(q->connect.basic.state & MX__REQUEST_STATE_COMPLETED)) {
    /* the remote node did not reply, set status to timeout */
    q->connect.basic.status.code = mx__error_req(ep, "mx_connect", q, MX_STATUS_TIMEOUT);
    q->connect.basic.state |= MX__REQUEST_STATE_COMPLETED;
    /* the request is still in the connectq, move it to connect_count */
    mx__spliceout_request(&ep->resend_list, q);
    ep->connect_count++;
  }

  *addrp = q->connect.basic.status.source;
  code =  q->connect.basic.status.code;
  ret = (code == MX_STATUS_SUCCESS) ? MX_SUCCESS :
    (code == MX_STATUS_BAD_KEY) ? MX_BAD_CONNECTION_KEY  :
    (code == MX_STATUS_ENDPOINT_CLOSED) ? MX_CONNECTION_FAILED :
    (code == MX_STATUS_TIMEOUT) ? MX_TIMEOUT :
    MX_CONNECTION_FAILED;

  ep->connect_count--;
  mx__rl_free(ep, q);

 out:
#if MX_DEBUG
  if (mx__opt.matter_debug >= 3)
    mx__conservation_of_matter(ep);
#endif
  MX__MUTEX_UNLOCK(&ep->lock);
  return ret;
}

mx_return_t
mx__reconnect_partner(struct mx_endpoint *ep, struct mx__partner * partner,
		      union mx_request ** reqp)
{
  union mx_request * q;

  if ((!mx__opt.disable_self && partner == ep->myself)
#if MX_USE_SHMEM
      || (!mx__opt.disable_shmem && partner->peer_index_n == ep->myself->peer_index_n)
#endif
    ) {
    *reqp = NULL;
    return MX_SUCCESS;
  }

  q = mx__rl_alloc(ep);
  if (!q)
    return mx__error(ep, "mx__reconnect_partner", MX_NO_RESOURCES);

  q->connect.is_synchronous = 0;
  q->connect.basic.status.match_info = 0;
  q->connect.basic.status.context = NULL;
  q->connect.basic.wq = NULL;
  q->connect.basic.type = MX__REQUEST_TYPE_CONNECT;
  q->connect.basic.state = 0;
  q->connect.basic.acquired_by_wait_any = 0;
  q->connect.dest_session_n = ep->endpoint_sid_n;
  q->connect.app_key_n = partner->app_key_n;
  q->connect.connect_seqnum_n = partner->connect_sendseq;
  q->connect.peer_endpoint_id = partner->eid;
  q->connect.peer_index_n = partner->peer_index_n;
  q->connect.basic.mcp_handle = -1;
  q->connect.basic.partner = partner;
  mx__partner_to_addr(partner, &q->basic.status.source);

  *reqp = q;
  return MX_SUCCESS;
}

/* See the bottom of mx__partner.c for documentation about disconnection and so */
mx_return_t
mx_disconnect(mx_endpoint_t ep, mx_endpoint_addr_t addr)
{
  struct mx__partner * partner;
  MX__MUTEX_LOCK(&ep->lock);
  mx__luigi(ep);
  partner = mx__partner_from_addr(&addr);
  mx__partner_cleanup(ep, partner, 1);
  MX__MUTEX_UNLOCK(&ep->lock);
  return MX_SUCCESS;  
}
